{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import scipy.sparse as ss\n",
    "import pickle\n",
    "import scipy.io as sio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "用户： 30755\n",
      "歌曲： 359966\n"
     ]
    }
   ],
   "source": [
    "pd.set_option('display.max_columns',1000)\n",
    "pd.set_option('display.width',1000)\n",
    "pd.set_option('display.max_colwidth',1000)\n",
    "#pd.set_option('display.max_rows',30)\n",
    "#\n",
    "df_train=pd.read_csv('../data1/train.csv')\n",
    "print('用户：',df_train['msno'].unique().shape[0])\n",
    "print('歌曲：',df_train['song_id'].nunique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "去掉无用的特征后训练集维度： (7377418, 3)\n"
     ]
    }
   ],
   "source": [
    "#数据预处理\n",
    "#type={'msno','song_id','source_system_tab','source_screen_name','source_type','target'}\n",
    "#去掉'source_system_tab','source_screen_name','source_type'三种不相关的特征\n",
    "df_train=df_train[['msno','song_id','target']]\n",
    "print('去掉无用的特征后训练集维度：',df_train.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                               msno  total_rating\n",
      "0      ++5wYjoMgQHoRuD3GbbvmphZbBBwymzv5Q4l8sywtuU=           293\n",
      "1      ++AH7m/EQ4iKe6wSlfO/xXAJx50p+fCeTyF90GoE9Pg=           141\n",
      "2      ++e+jsxuQ8UEnmW40od9Rq3rW7+wAum4wooXyZTKJpk=            76\n",
      "3      ++xWL5Pbi2CpG4uUugigQahauM0J/sBIRloTNPBybIU=             1\n",
      "4      +/SKX44s4ryWQzYzuV7ZKMXqIKQMN1cPz3M8CJ8CFKU=            82\n",
      "5      +/USTSczbe/vrPPatLFT8TPKNBt0J5ValJhfr3vNALI=             0\n",
      "6      +/UwoUi5+rNj/F6RO6gMrMhOy0oTzs90MWKVNZs4+Wg=            96\n",
      "7      +/tv7+daiSsEEBhNDnQZL32wdfmr2GSYmX2aQPGONRY=             3\n",
      "8      +/vppV5BoMKMNAk7Bej5rjhKeE6VqrRjUv8EYXoAjyM=             2\n",
      "9      +0+XaewkS3za58vHwKCVGwRYRvjig4lTtKDV/tOkzbU=            49\n",
      "10     +06pc1V8QfywrGOTNln/7X7Md8dA8fVOIU37n2M5mv8=           349\n",
      "11     +0B4aHABar5ltaWDG4M6KCzNxpD6wnsvZpvh+o9KgtE=           116\n",
      "12     +0CxUZKWtmqPKlW+2VF+hY2rfq+Dh9JM/AqoZJnCr4E=            21\n",
      "13     +0U8n45rrg6b5WCy3wpqOUYCwZyLqecr3lux9K6ahMk=           247\n",
      "14     +0e0lK2leRcjxHEHKxIsaVPA+kMIl7m7cpUJDsVvdtQ=             1\n",
      "15     +0e12C+p9dzDbOvKjt8eElKH9yZPshAstxjm60XFgSM=           153\n",
      "16     +0fIn7dpNl78+g/0mdltKgWRW9sjv4HjTgrB+RJy1EM=            15\n",
      "17     +0fsyPeueRG/vv7UWK0vvhaRK8nRU0P2yK6UrWUvk8o=            31\n",
      "18     +0qp8s9V3eIIPGNZpQzmFNOetFXHApbV7WGgVXcisCY=            75\n",
      "19     +1S8kSyg/yHej4r0ljM3/SiemWL11TWAzKtCNtCBk5Q=           245\n",
      "20     +1a0wTElsAMdYysWZk1uBxeLnjxIc4vB2EznisNlpVw=           117\n",
      "21     +1oEQNa1cLpYIqM4+yDwc78kv8dCJWDjkixPPbNRPSk=            32\n",
      "22     +1yc8TkVhgxT0sW7QRrPXUmX4Bx3KfKKsiVyWrlHnF0=            15\n",
      "23     +2/1fWiUCPA3rRUHlTIKZIr/ZvrxjC3bP1pK69mdacg=             2\n",
      "24     +22Q6EpFwjgJhiiGWz7GQUiq5yu0adEEZWH8j/fj19w=           351\n",
      "25     +2PpOOHUOBpXR8LWNF/Mewun185agG3+g9BnDMWQ47Y=             0\n",
      "26     +2XBp2p9atTpyVPWac3SEEW41i8Xrnl7Utl/h8T4iOo=           333\n",
      "27     +2dukLzY+E4IVHMJPFcjBu21pcSArwiZyWG2w5rGUoI=           603\n",
      "28     +3+6UDIIlqrd+JwTUBUX/6eQxl4bvbF3O3rsqt/gszs=            50\n",
      "29     +3RfkckBHS0oQrrjJj7p69bdwJak+qzKq0fhFaeXUxA=          1169\n",
      "...                                             ...           ...\n",
      "30725  zw30zrUQUtiWnOYVEj4aMnLb13ENvOYtnnWq3bS9Yl0=           397\n",
      "30726  zw82FysqQouUkYU8/7hcrRkQ2ufoU0dPtSIrsF4F+yE=            42\n",
      "30727  zwlhmGn0ESVwIt2JVa6GLBqzbFIugFgwLZDVBRWHJrc=            73\n",
      "30728  zwmC9X0MdW+2o14b0IGpo+dJ+6O9PnextNfmWjEJjqA=           305\n",
      "30729  zwnnOtHeooUKwmZC2XN5B/1aTi97HN8iMGpzX5Iq16o=           279\n",
      "30730  zwrhOLzmWFgPcd8DyeKAlQ4wYv7QFAem9JDDPzNlQYw=            53\n",
      "30731  zwrlV6QbmZqX2okdjRMRBc9bzxD30mcXbkAJ/096rVk=             0\n",
      "30732  zwv9PrDsEvmWWRj+FOWuF5wYcpcAdhC+qUzDj7RG5WQ=           159\n",
      "30733  zwx8itTL3eFLViVDTA7CBKmsqLz5u+BsF1lYH/oo+y8=             0\n",
      "30734  zx1csbW27owkvdvBRmWIWlLvRXLrsvoWjWWn9v/7Xbs=            17\n",
      "30735  zx51OvMA6lJ+iNxD6lZnUppO8ZKHhuKBIAgGXAObgqQ=           459\n",
      "30736  zxHTZQZQ9wUdnNDSCB5O9B4ePgIThkEucEF9s8DumTs=            16\n",
      "30737  zxKir3gvnkDncSSrX0zBk7OJJ2zZ9EWS7TEUYDEg+F0=           102\n",
      "30738  zxMVg3l0frfflcndzaKZwb9qhEXqzpouqIdwOwiPRuA=           172\n",
      "30739  zxP+bamCyM7YEJLAmgcHtIfCtVW0jroJLZX8PAUmL7E=            76\n",
      "30740  zxXgZXulKy2AI2nhyYU/DjdCxPKKzYnhEiqv7x7/g10=           421\n",
      "30741  zxkOKlYxQAJefguoHUfGcAsM2zj4hoUH9fjutb/3+98=            34\n",
      "30742  zxp9DaQYuxk471IbGC5mI+ZyhoU/lbfw4cm1cZgHyPE=            78\n",
      "30743  zxyFUnD5Dxv8HMn9Ric1Qp6Q2WfvTpT270Ot6zj/7TI=            60\n",
      "30744  zy1OTzCnxegYltHBw1D1pZ8OH41xuwN+N6a+YMB0pwk=            99\n",
      "30745  zyC3oBxuljEDumDnxoCfDnoHnu4qq0bkhUwKRRPneDo=            15\n",
      "30746  zyCQN/h38OmzYOsR6ecE+b7GFkO2eCHoxT1tXoFdUjA=           131\n",
      "30747  zyUP22sI9X7z7L6eBsMtC7bK32n4fjE12pgms7NWL6U=            36\n",
      "30748  zylzgLcmfGqcEZCkp0DtELQ6wwVsIOjsJPxCGKdR/2w=             0\n",
      "30749  zzH8+kEnDu02NMWBPCjio6wOujBZUs7y9o7AWWcvIGo=           125\n",
      "30750  zzTnhToV1XfDIsDJ1YaXM3Q05xV54T8Q0zTJkakXsks=             0\n",
      "30751  zzZBJUYXrb168A4Ff4sA8L2iOH0x4ciKdD6WkV53XaE=           113\n",
      "30752  zzompfSaMamqvjyCMYvgUBwYrxh8fHE40z3f73CQoak=           229\n",
      "30753  zzqc2ja7z10FtSpagYVcAZXg/gPRq7wcDZuNFj+zJSU=           113\n",
      "30754  zzzRi5ek1YCKTGns8C77xwAutE05PAPmz8T/pIIQhzE=             2\n",
      "\n",
      "[30755 rows x 2 columns]\n"
     ]
    }
   ],
   "source": [
    "#特征工程\n",
    "#msno song_id属于hash编码后的特征\n",
    "#计算用户对每首歌曲的打分\n",
    "df_user_rating=df_train[['msno','target']].groupby('msno').sum().reset_index()\n",
    "\n",
    "df_user_rating.rename(columns={'target':'total_rating'},inplace=True)\n",
    "print(df_user_rating,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "用户订阅过的音乐，及总和：\n",
      "                                                  msno                                       song_id  target  total_rating\n",
      "0        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=       1          2791\n",
      "1        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  3qm6XTZ6MOCU11x8FIVbAGH5l5uMkT3/ZalWG1oo2Gc=       1          2791\n",
      "2        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  3Hg5kugV1S0wzEVLAEfqjIV5UHzb7bCrdBRQlGygLvU=       1          2791\n",
      "3        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  bPIvRTzfHxH5LgHrStll+tYwSQNVV8PySgA3M1PfTgc=       1          2791\n",
      "4        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  XUz4Z4wPPChz+OIwkwj7HJ8teIjW3rEMdEATM80rDxM=       1          2791\n",
      "5        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  +h+uIXLQyXHuCdwAY5oT3T302vt0IdQJtux0APXhriw=       1          2791\n",
      "6        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  +LztcJcPEEwsikk6+K5udm06XJQMzR4+lzavKLUyE0k=       1          2791\n",
      "7        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  rdNg0FiLBStCei8h1/h2kwv8IS97ZgONQkb/kXGAvqM=       1          2791\n",
      "8        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  WRuHbl8CJbAuzUb4f/88Ly6IXif+IVVWolkp33Nm8wI=       1          2791\n",
      "9        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  NWrFxxzjzsSolyGnnLWlkRrMUIzbxhKesaJHNOp8e80=       1          2791\n",
      "10       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  eY3mod6Vs39GLjL86RwYG+6jDkjYUB5AGjbPEuqJmQM=       1          2791\n",
      "11       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  nt4dYk1vQyKPzun5YtaRpLB9xWOWIU3QKiZV3aTOJYs=       1          2791\n",
      "12       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  BnjOmObP+8X8Yxk3tPHy1309L++jhPWBFD8QXCYJsT4=       1          2791\n",
      "13       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  EUEMfptvZVeqhSQI6PzZeE/dX34a4fE1AnbJNyuDPjk=       1          2791\n",
      "14       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  yPMxRWxhG2gZj2BV1mzTW/faIoigIh1F9gWtXBslIOg=       1          2791\n",
      "15       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  e8qPmXjLP6RwXiYsrQB2Z3gRSS34LNCoYFR7iYznvOs=       1          2791\n",
      "16       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  YK5U5FaNxjp4ICossGxITj7uxPEBSOrzD/ogfvujcXs=       1          2791\n",
      "17       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  NOZGiW7PvfAlCrlciaZpmYMpXM8OiJMnLjqUfna7vQM=       1          2791\n",
      "18       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  IKMFuL0f5Y8c63Hg9BXkeNJjE0z8yf3gMt/tOxF4QNE=       1          2791\n",
      "19       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  81mK2AN53Wxo7lKT15YL1lcCYdwmuZ/cSnmDiuwG+Uc=       1          2791\n",
      "20       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  p9Vg2zk7w856BnMwxQYfqHp/DR7T+8TLqA6ZdA7s+MA=       1          2791\n",
      "21       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  +lmBkKRxox8E2SdS6gdyU+DaGpf5DfNT94qyGo+udPU=       1          2791\n",
      "22       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  RLpgMQTJp+tVSJrTH+0CiQXFqDYiTA/6XFcudU8r/gg=       1          2791\n",
      "23       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  koF2RGY1ZmpeylEgkSIEl+ee4Cs7F3Vkf6PnV/gHkb8=       1          2791\n",
      "24       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  R335aDVP9ZQtRVczBZsL42tzPTruFj6cdmZwpjX/U9A=       1          2791\n",
      "25       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  q3ciI9hdndWhjjP0t6aEUtGTSfwOYH5WwUi6DTXY+Vs=       1          2791\n",
      "26       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  WSBiRTkhNB9UjO9pP6a+NrD8UnkaKq9VHPFlSHuMVSE=       0          2791\n",
      "27       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  vKnUal7UGXSL5tJiSMCvd9tZzQxUccL5flhLugrLCaA=       1          2791\n",
      "28       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  GSZ0gIf6RxyHDRCSxVU4MAzzOTCikoGY9Cp1pI6xiJo=       1          2791\n",
      "29       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  Ru7n8Xw2s8LGDsgDhyzWqCWQRWQW9KNPY9qMOFAf5x0=       1          2791\n",
      "...                                               ...                                           ...     ...           ...\n",
      "7377388  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  as7bXjxE++S9bt2J+xLlO3zebSQf/xWpP3ayX36RPsg=       1             6\n",
      "7377389  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  +qUbpr7twjsFKXd4DdV7q+XzkAMOCSSWlLh9T4/qH7I=       0             6\n",
      "7377390  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  Irmfofy3g8p4ZpkBK+o+bcixbsNJ5tqlm/GJYYrDqGM=       0             6\n",
      "7377391  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  nL4V8M8ebB4nSZBL2WUflU4gAWKzc+48Rbh8Y8s80E8=       0             6\n",
      "7377392  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  DWwk7s1MeQc0W8lBVZciEokgrQ/Pfloy4ofEgchCiwc=       0             6\n",
      "7377393  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  nO6o2gW+V7ZamFfzdbbchzESnOE3vFOTZkCvYd7eJd0=       1             8\n",
      "7377394  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  Xpjwi8UAE2Vv9PZ6cZnhc58MCtl3cKZEO1sdAkqJ4mo=       1             8\n",
      "7377395  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  uzU6ntNZT60j0CqFNXTpF6Oo8W3w5UwU/ANEODidW04=       1             8\n",
      "7377396  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  8YUHzpRNxYkj2y99bmbmCI7tQUgESdRzTThSPovZ3Zw=       0             8\n",
      "7377397  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  XkTE3XVEfeBh/AYg2DB4zi/GXLKZl7w1UNqT5X2AK8E=       1             8\n",
      "7377398  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  iEQA6d2yx8JtQLqT5wVEgWgSQuN+p6lUQsGJx4FrC2A=       1             8\n",
      "7377399  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  podlycp3c6tOEDw7q8iQBVgvFjiMpkWWXTlXljGdbTY=       1             8\n",
      "7377400  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  z5ps7i1JPniXcrpFgXsdNTpcnT15e1PNUjhojniJq4c=       1             8\n",
      "7377401  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  ayL6eLHKSrmX4smFi1KbGMYrWI9EwsiErGjWUJ9CTrQ=       1             8\n",
      "7377402  PpGtbkm587Wdk+87fog2IS/1LTP9gNRpOdyCLY/Bm4I=  lp7zKhZ+otpqMRcJ5Lk6p2Kc3BStTGgZKtNRPLMEDqw=       0             1\n",
      "7377403  PpGtbkm587Wdk+87fog2IS/1LTP9gNRpOdyCLY/Bm4I=  CsqfeIr3/stOaLTq+Z+Yt6VnPSqPWS2wxxnCKJL86oM=       1             1\n",
      "7377404  moJZ3JNvEw5P27feIGVz5/O3TSDttPKwM1Y9XU9u33k=  ZcKgNis1AP1LA0sdtIddrtk7P04iiJzJrXvwXdT/X3Q=       0             0\n",
      "7377405  1Y/5t7/lVxT6scW1U4FtrafFnHe5HLr0793S4cNrXyM=  M9rAajz4dYuRhZ7jLvf9RRayVA3os61X/XXHEuW4giA=       0             0\n",
      "7377406  4N9yZKANKrn3HQIBhoVhcTEhyyRceozpTcnnqsQ+gss=  0m0HHeb33rsWnLA3N13G2GKPYx3pgBN2JLoutqR8Mi0=       1             2\n",
      "7377407  4N9yZKANKrn3HQIBhoVhcTEhyyRceozpTcnnqsQ+gss=  5veDaknFWh0v1vD1hYS6ormlAuz4lcqgp2QtXnU5jFw=       1             2\n",
      "7377408  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  M9rAajz4dYuRhZ7jLvf9RRayVA3os61X/XXHEuW4giA=       1             3\n",
      "7377409  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  ih58jIWgit7qPy3VpvbM6E7AIvs57ylCHMVpIl1T+Io=       0             3\n",
      "7377410  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  n+FLlTL+LrNTa6xpoGkESEzlUMb/hPEM1nLq2U6fEp0=       0             3\n",
      "7377411  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  0b8M2FLEH176y/ng1rK9ftUYDYnA+1NdoCOUeuyBMtU=       1             3\n",
      "7377412  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  e/upMhPFcXl3Rxq0Us1abr0FcThlZOeqKJhBobmltBU=       0             3\n",
      "7377413  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  yk2hZS4SA2yit4orR06WHYDkkXg5+E77bA7Y533znZ4=       0             3\n",
      "7377414  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  CfOyUSvBAIGhsUEcZ6hTnWLLaQrDtzVmoIdmIMXBOTA=       0             3\n",
      "7377415  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  CsqfeIr3/stOaLTq+Z+Yt6VnPSqPWS2wxxnCKJL86oM=       1             3\n",
      "7377416  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  wxghAK3cq1rb0Kkyx6dSxzKGYadE8TQWF9L6dq9B1Is=       0             3\n",
      "7377417  oHMpaCQRI6fzq/jamfTvv5tFfukkE9/VERi7/CfQ9M0=  xGZ1Qfh7+MA5fUF0Emnk3RU5EMu4iISek5F/ky9MIZ8=       0             0\n",
      "\n",
      "[7377418 rows x 4 columns]\n"
     ]
    }
   ],
   "source": [
    "#每首歌曲的播放比例\n",
    "df_train=pd.merge(df_train,df_user_rating)\n",
    "del df_user_rating\n",
    "print('用户订阅过的音乐，及总和：\\n',df_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "#删除总打分次数为0的用户（这里打分此时为0，代表着该用户在本月是第一次来\n",
    "# 或者该用户上个月订阅过音乐，但是这个月该用户流失了\n",
    "#通过观察，发现索引为7377417的用户订阅的音乐次数为0，所以去掉该用户\n",
    "#total_rating为0的索引\n",
    "index=df_train[df_train.total_rating==0].index.tolist()\n",
    "#print('index=',len(index))\n",
    "df_train=df_train.drop(index=index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                                 msno                                       song_id  target  total_rating  fractional_rating_count\n",
      "2907879  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  LPJOSGYnGUFOXJkKPrmEXJ7iMb3mvY8R2ofkmgU71LA=       1          3288                 0.000304\n",
      "2904831  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  TNX7iJ97he2nSjHX/EqW5BYXIYoQjwjf2Hg/CHYqR80=       1          3288                 0.000304\n",
      "2904824  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  rOWFaCuk3hjksL6KjP7fNvquyV/kbP7z5yS+pCm20KA=       0          3288                 0.000000\n",
      "2904825  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  hKs8gwK7qKNeXTvzZ3U8y8aDiBsE0y7uksji0TPbnrQ=       1          3288                 0.000304\n",
      "2904826  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  m/xwFbdL6o4DK0xAo+PgYVGptZIJd6zlq+IsX1LLg8U=       0          3288                 0.000000\n",
      "2904827  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  V5vnSrmStEWpy2wuA/VqDbaSR99xnf6sQQapvSgysb4=       1          3288                 0.000304\n",
      "2904828  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  mY7WaXacJX2qsSWI7L6g6mVsO+BLtELHYJHOxlqSra4=       1          3288                 0.000304\n",
      "2904829  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  AJQOsWR+d8TYfx8K3IngAs9/GSQE5ujDqBx/8PYm6zo=       1          3288                 0.000304\n",
      "2904830  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  aPfpRsLCJC6Hw0S9bN9DGQ5evMIX6lWA1rEBe4B1inE=       1          3288                 0.000304\n",
      "2904832  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  Gd6vURGU2Wy6comgtbXMJeKsYfRmscOWuMLOAd1y/Oc=       1          3288                 0.000304\n",
      "2904727  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  c/TSW5nx2McXDqxxSwARQ4ZOlLiqd7x2EAqQJMnKcxs=       1          3288                 0.000304\n",
      "2904833  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  X6XRIogLxotLfw33THblECtGSz7SvoBSqrRAKJOYuLk=       1          3288                 0.000304\n",
      "2904834  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  V64L/qsKWAPuFskaNx/00Q+wE0oH4c+r0973VY766eg=       1          3288                 0.000304\n",
      "2904835  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  UswLRvp+4oB07MrL3J1IweyWCRc/JFRp87WuVfn7DM0=       0          3288                 0.000000\n",
      "2904836  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  eI/fppo6SzfnQ/zAbGxwgLQO3ImeYb/SEBqiRIsuleM=       1          3288                 0.000304\n",
      "2904837  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  68oT2N/+gXsxXrsL1qZjE8rGn6/gfUpPZBPU1AwVHfI=       1          3288                 0.000304\n",
      "2904838  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  NhQfOVnptpW5z5qsP/sN+hVS8TM2iYvdy/ZGVe7K+kM=       1          3288                 0.000304\n",
      "2904839  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  APeke76mPTgaGUQwi/Ws470Az+p6Qmc5rmV+9wHJxPc=       1          3288                 0.000304\n",
      "2904823  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  3GXNYHMa6FtSul+9mqzwzh+86CHRvPjdQ85lfPjtJmw=       1          3288                 0.000304\n",
      "2904822  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  IggQZElxZe8S3/UkjO48O9lBxNG5TX5oWqPXlwbfBuI=       0          3288                 0.000000\n",
      "2904821  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  spWj+Itkejp7q3S9DwgFEDcyULADpiYEut48xX2UTHo=       0          3288                 0.000000\n",
      "2904820  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  A+RNE+nwHzzYdQliLcF1YcaMpmDL91TAbz8xALp4FEM=       1          3288                 0.000304\n",
      "2904805  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  yKYosugIwAtubLZYjcmJ9XmyzQO6M0L2ge+4G3jY4FE=       0          3288                 0.000000\n",
      "2904806  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  e9Ao01jgwLRo66tejtj/gvwP8+GasAyCeFSWM83dldg=       1          3288                 0.000304\n",
      "2904807  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  2qf/a8aJPrAWimn7OSDZg0vnvKj63bPl7tk4GkY/SqY=       1          3288                 0.000304\n",
      "2904808  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  IQpF4wMgtL95jv8Rlred6WSyARVBKWAhABjXavG/ZPQ=       1          3288                 0.000304\n",
      "2904809  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  tn3R1gwM9bQlf65IDR/zNNSQ5lSUFM06PlFmGwp7hdI=       1          3288                 0.000304\n",
      "2904810  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  /dJXliYHwlAmxL43MDR/i3A9K8duqkLZ3rNepCq4bp0=       1          3288                 0.000304\n",
      "2904811  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  y6LdD43w9QfNUl86aqDaY34BT18I1jwxevvXwSoJbaU=       0          3288                 0.000000\n",
      "2904812  o+5RNlSWrzvrphgBNGIo1FLkGxBgyICns6qXj3nS7Pk=  i+tb27qN6/8gxDOowJcQOLn2cO+sM0YjgzSRLZbMcPA=       1          3288                 0.000304\n",
      "...                                               ...                                           ...     ...           ...                      ...\n",
      "6851231  k2r3hcfF4aDjW0YxIrYW2zpUXbLZUfw/qC/yhBVRxPk=  r19/Bykb1yypmn1RnCKit4kibkjcAo8c4hZHJr1eAeQ=       0             1                 0.000000\n",
      "6851230  k2r3hcfF4aDjW0YxIrYW2zpUXbLZUfw/qC/yhBVRxPk=  podlycp3c6tOEDw7q8iQBVgvFjiMpkWWXTlXljGdbTY=       1             1                 1.000000\n",
      "6851229  k2r3hcfF4aDjW0YxIrYW2zpUXbLZUfw/qC/yhBVRxPk=  T86YHdD4C9JSc274b1IlMkLuNdz4BQRB50fWWE7hx9g=       0             1                 0.000000\n",
      "6851228  k2r3hcfF4aDjW0YxIrYW2zpUXbLZUfw/qC/yhBVRxPk=  XU/IIf78hxFd9vuBEE14BAhjf7FVkohkHJf8AZ3BtNI=       0             1                 0.000000\n",
      "6851227  k2r3hcfF4aDjW0YxIrYW2zpUXbLZUfw/qC/yhBVRxPk=  WJyW1Eqg3hXHl2+JB/qMSAQ/3VxqSNVvPniD/ReCo2I=       0             1                 0.000000\n",
      "6851226  k2r3hcfF4aDjW0YxIrYW2zpUXbLZUfw/qC/yhBVRxPk=  VkILU0H1h3NMmk9MQrXouNudGk5n8Ls5cqRRuBxeTh4=       0             1                 0.000000\n",
      "6851225  k2r3hcfF4aDjW0YxIrYW2zpUXbLZUfw/qC/yhBVRxPk=  fLPjn5bodn0e7PrE+Iue4MnTLGL2O/J5ONE2i0v31UY=       0             1                 0.000000\n",
      "2241530  Fa1Z70/2+RW+LUMhCX5NrkRugNZ5biTQmncucQtgQxY=  HDSs2ralcu9lEuBSE/JK0XelYIRLyx0qWBHaW/ckYic=       0             1                 0.000000\n",
      "2241529  Fa1Z70/2+RW+LUMhCX5NrkRugNZ5biTQmncucQtgQxY=  ELqPma0iEHSyErE2F2TEn7UiT4Ci3gug2xv9N6OtjH0=       0             1                 0.000000\n",
      "2241528  Fa1Z70/2+RW+LUMhCX5NrkRugNZ5biTQmncucQtgQxY=  sib9qXeVBisMOq+c06DXA9Ce5Ce7XXgvitzTHwVoSvU=       0             1                 0.000000\n",
      "6602757  cggvIKhBJ0fb1alCDHmzMrzvPWxC7hred329XgUAjkE=  NQKOxs5WlO+OnB1c9u45WFGw1r0MuE9cHWKs7nmU4Y8=       0             1                 0.000000\n",
      "6423283  A5L7ug+tL9jgXvesDuUg6BGn1KoD/oHI9O2iARLggTM=  Q2eLp+f0Y/k4dnQLMoSrpmrPTQpK+YJKB0DqQ2JAB2g=       0             1                 0.000000\n",
      "6423282  A5L7ug+tL9jgXvesDuUg6BGn1KoD/oHI9O2iARLggTM=  n4llg413f4vtMDvpb0XkYvEJyvF8jewSnR720wiAS2E=       0             1                 0.000000\n",
      "6423281  A5L7ug+tL9jgXvesDuUg6BGn1KoD/oHI9O2iARLggTM=  VjczeGkkrPqUpTMS3pg4X6lDOw1Iv6tYfXqqRevIHRs=       0             1                 0.000000\n",
      "6423280  A5L7ug+tL9jgXvesDuUg6BGn1KoD/oHI9O2iARLggTM=  uPrK8bJD3ZhKuVNs7OLa3ga6u2nRdTVfBkFGrqX09h8=       0             1                 0.000000\n",
      "6423279  A5L7ug+tL9jgXvesDuUg6BGn1KoD/oHI9O2iARLggTM=  VGiMlcgm+h6GT9cSiucOoOhdt5XajW6TsuEMlilZ9Eg=       0             1                 0.000000\n",
      "6423278  A5L7ug+tL9jgXvesDuUg6BGn1KoD/oHI9O2iARLggTM=  PxNKRdWDZ1fWY9ezyHNPdYiCTV9KbvwpBck2eJY8qVk=       0             1                 0.000000\n",
      "6423277  A5L7ug+tL9jgXvesDuUg6BGn1KoD/oHI9O2iARLggTM=  jfcnG0J3ElQM1fVOUNgsQS9mrw/EC99eXHxwDN7s2D8=       0             1                 0.000000\n",
      "6423276  A5L7ug+tL9jgXvesDuUg6BGn1KoD/oHI9O2iARLggTM=  doUegDdK60TkLdjAxLbz6IclJZIh+vzwoI8A3EHgJmE=       1             1                 1.000000\n",
      "6602756  cggvIKhBJ0fb1alCDHmzMrzvPWxC7hred329XgUAjkE=  H0ff3F9JCs77wtCEWTc5Z81V0dAYFLPqLN0RF1UyE1Q=       0             1                 0.000000\n",
      "2241527  Fa1Z70/2+RW+LUMhCX5NrkRugNZ5biTQmncucQtgQxY=  al/5dJXXLJG1kHOaV9Pc8Jv2+/G1RcHa8kudLIG0UbA=       0             1                 0.000000\n",
      "6602755  cggvIKhBJ0fb1alCDHmzMrzvPWxC7hred329XgUAjkE=  zkPytOjMB6HCU9wjrEWQ/ls9Z1t2gLhV9PV3mASGOkg=       0             1                 0.000000\n",
      "6602754  cggvIKhBJ0fb1alCDHmzMrzvPWxC7hred329XgUAjkE=  85vLXDLjLfx4Z0Xs2Q9LMCy27L/5PpBOwtCGU4avCxE=       0             1                 0.000000\n",
      "6602753  cggvIKhBJ0fb1alCDHmzMrzvPWxC7hred329XgUAjkE=  /cZa6xtaZg0SVuOulkMefSOHCc/oSTX20KEa1vK8bKI=       0             1                 0.000000\n",
      "6602752  cggvIKhBJ0fb1alCDHmzMrzvPWxC7hred329XgUAjkE=  Vl5TmXU7tOERtow/PU6CaLBFTICTqKhe98JCfg3q8Us=       0             1                 0.000000\n",
      "6602751  cggvIKhBJ0fb1alCDHmzMrzvPWxC7hred329XgUAjkE=  WsNwO+hBiGcP1FKgQfhoO8erJhqgvYuyuJ0Dw2A8Sk8=       0             1                 0.000000\n",
      "6602750  cggvIKhBJ0fb1alCDHmzMrzvPWxC7hred329XgUAjkE=  yCV7ZQgiIb1Crmn79V39YCIrHkFsHf3Q5HTdDnKZWRk=       0             1                 0.000000\n",
      "6602749  cggvIKhBJ0fb1alCDHmzMrzvPWxC7hred329XgUAjkE=  UPGyJ0SaYwfnXqbHDD/rBhpdchuUvXCgNsblwJk5B5o=       0             1                 0.000000\n",
      "6602748  cggvIKhBJ0fb1alCDHmzMrzvPWxC7hred329XgUAjkE=  J/CXt5DEQzK3m+UDVoeMxXoXHkNeuhht5XnJVUh3LcI=       0             1                 0.000000\n",
      "5668742  mcDWvT8WPKzS87d3KB+QZ6poNnNYldBrJoaFwOfpyNI=  TPplpewPCyMdjIWSeYt3m5hmf7hM8UhVIHadVVQnQ8g=       1             1                 1.000000\n",
      "\n",
      "[7347972 rows x 5 columns]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                                 msno                                       song_id  target  total_rating  fractional_rating_count\n",
      "0        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=       1          2791                 0.000358\n",
      "1        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  3qm6XTZ6MOCU11x8FIVbAGH5l5uMkT3/ZalWG1oo2Gc=       1          2791                 0.000358\n",
      "2        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  3Hg5kugV1S0wzEVLAEfqjIV5UHzb7bCrdBRQlGygLvU=       1          2791                 0.000358\n",
      "3        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  bPIvRTzfHxH5LgHrStll+tYwSQNVV8PySgA3M1PfTgc=       1          2791                 0.000358\n",
      "4        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  XUz4Z4wPPChz+OIwkwj7HJ8teIjW3rEMdEATM80rDxM=       1          2791                 0.000358\n",
      "5        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  +h+uIXLQyXHuCdwAY5oT3T302vt0IdQJtux0APXhriw=       1          2791                 0.000358\n",
      "6        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  +LztcJcPEEwsikk6+K5udm06XJQMzR4+lzavKLUyE0k=       1          2791                 0.000358\n",
      "7        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  rdNg0FiLBStCei8h1/h2kwv8IS97ZgONQkb/kXGAvqM=       1          2791                 0.000358\n",
      "8        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  WRuHbl8CJbAuzUb4f/88Ly6IXif+IVVWolkp33Nm8wI=       1          2791                 0.000358\n",
      "9        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  NWrFxxzjzsSolyGnnLWlkRrMUIzbxhKesaJHNOp8e80=       1          2791                 0.000358\n",
      "10       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  eY3mod6Vs39GLjL86RwYG+6jDkjYUB5AGjbPEuqJmQM=       1          2791                 0.000358\n",
      "11       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  nt4dYk1vQyKPzun5YtaRpLB9xWOWIU3QKiZV3aTOJYs=       1          2791                 0.000358\n",
      "12       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  BnjOmObP+8X8Yxk3tPHy1309L++jhPWBFD8QXCYJsT4=       1          2791                 0.000358\n",
      "13       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  EUEMfptvZVeqhSQI6PzZeE/dX34a4fE1AnbJNyuDPjk=       1          2791                 0.000358\n",
      "14       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  yPMxRWxhG2gZj2BV1mzTW/faIoigIh1F9gWtXBslIOg=       1          2791                 0.000358\n",
      "15       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  e8qPmXjLP6RwXiYsrQB2Z3gRSS34LNCoYFR7iYznvOs=       1          2791                 0.000358\n",
      "16       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  YK5U5FaNxjp4ICossGxITj7uxPEBSOrzD/ogfvujcXs=       1          2791                 0.000358\n",
      "17       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  NOZGiW7PvfAlCrlciaZpmYMpXM8OiJMnLjqUfna7vQM=       1          2791                 0.000358\n",
      "18       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  IKMFuL0f5Y8c63Hg9BXkeNJjE0z8yf3gMt/tOxF4QNE=       1          2791                 0.000358\n",
      "19       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  81mK2AN53Wxo7lKT15YL1lcCYdwmuZ/cSnmDiuwG+Uc=       1          2791                 0.000358\n",
      "20       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  p9Vg2zk7w856BnMwxQYfqHp/DR7T+8TLqA6ZdA7s+MA=       1          2791                 0.000358\n",
      "21       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  +lmBkKRxox8E2SdS6gdyU+DaGpf5DfNT94qyGo+udPU=       1          2791                 0.000358\n",
      "22       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  RLpgMQTJp+tVSJrTH+0CiQXFqDYiTA/6XFcudU8r/gg=       1          2791                 0.000358\n",
      "23       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  koF2RGY1ZmpeylEgkSIEl+ee4Cs7F3Vkf6PnV/gHkb8=       1          2791                 0.000358\n",
      "24       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  R335aDVP9ZQtRVczBZsL42tzPTruFj6cdmZwpjX/U9A=       1          2791                 0.000358\n",
      "25       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  q3ciI9hdndWhjjP0t6aEUtGTSfwOYH5WwUi6DTXY+Vs=       1          2791                 0.000358\n",
      "26       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  WSBiRTkhNB9UjO9pP6a+NrD8UnkaKq9VHPFlSHuMVSE=       0          2791                 0.000000\n",
      "27       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  vKnUal7UGXSL5tJiSMCvd9tZzQxUccL5flhLugrLCaA=       1          2791                 0.000358\n",
      "28       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  GSZ0gIf6RxyHDRCSxVU4MAzzOTCikoGY9Cp1pI6xiJo=       1          2791                 0.000358\n",
      "29       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  Ru7n8Xw2s8LGDsgDhyzWqCWQRWQW9KNPY9qMOFAf5x0=       1          2791                 0.000358\n",
      "...                                               ...                                           ...     ...           ...                      ...\n",
      "7377385  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  zGPSre/5uRjXC0Tk0Jg6k0y4pZ/jITGi+5KOZ1qTCLE=       0             6                 0.000000\n",
      "7377386  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  pNFxumkeGnkpmu5StyoyqtkNFyyDo8dZG3aNKs5Fn0c=       1             6                 0.166667\n",
      "7377387  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  FX3h9BKmJRKks6I/fxWC59nUDa4KSWyuFTnusRhR6kU=       1             6                 0.166667\n",
      "7377388  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  as7bXjxE++S9bt2J+xLlO3zebSQf/xWpP3ayX36RPsg=       1             6                 0.166667\n",
      "7377389  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  +qUbpr7twjsFKXd4DdV7q+XzkAMOCSSWlLh9T4/qH7I=       0             6                 0.000000\n",
      "7377390  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  Irmfofy3g8p4ZpkBK+o+bcixbsNJ5tqlm/GJYYrDqGM=       0             6                 0.000000\n",
      "7377391  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  nL4V8M8ebB4nSZBL2WUflU4gAWKzc+48Rbh8Y8s80E8=       0             6                 0.000000\n",
      "7377392  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  DWwk7s1MeQc0W8lBVZciEokgrQ/Pfloy4ofEgchCiwc=       0             6                 0.000000\n",
      "7377393  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  nO6o2gW+V7ZamFfzdbbchzESnOE3vFOTZkCvYd7eJd0=       1             8                 0.125000\n",
      "7377394  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  Xpjwi8UAE2Vv9PZ6cZnhc58MCtl3cKZEO1sdAkqJ4mo=       1             8                 0.125000\n",
      "7377395  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  uzU6ntNZT60j0CqFNXTpF6Oo8W3w5UwU/ANEODidW04=       1             8                 0.125000\n",
      "7377396  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  8YUHzpRNxYkj2y99bmbmCI7tQUgESdRzTThSPovZ3Zw=       0             8                 0.000000\n",
      "7377397  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  XkTE3XVEfeBh/AYg2DB4zi/GXLKZl7w1UNqT5X2AK8E=       1             8                 0.125000\n",
      "7377398  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  iEQA6d2yx8JtQLqT5wVEgWgSQuN+p6lUQsGJx4FrC2A=       1             8                 0.125000\n",
      "7377399  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  podlycp3c6tOEDw7q8iQBVgvFjiMpkWWXTlXljGdbTY=       1             8                 0.125000\n",
      "7377400  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  z5ps7i1JPniXcrpFgXsdNTpcnT15e1PNUjhojniJq4c=       1             8                 0.125000\n",
      "7377401  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  ayL6eLHKSrmX4smFi1KbGMYrWI9EwsiErGjWUJ9CTrQ=       1             8                 0.125000\n",
      "7377402  PpGtbkm587Wdk+87fog2IS/1LTP9gNRpOdyCLY/Bm4I=  lp7zKhZ+otpqMRcJ5Lk6p2Kc3BStTGgZKtNRPLMEDqw=       0             1                 0.000000\n",
      "7377403  PpGtbkm587Wdk+87fog2IS/1LTP9gNRpOdyCLY/Bm4I=  CsqfeIr3/stOaLTq+Z+Yt6VnPSqPWS2wxxnCKJL86oM=       1             1                 1.000000\n",
      "7377406  4N9yZKANKrn3HQIBhoVhcTEhyyRceozpTcnnqsQ+gss=  0m0HHeb33rsWnLA3N13G2GKPYx3pgBN2JLoutqR8Mi0=       1             2                 0.500000\n",
      "7377407  4N9yZKANKrn3HQIBhoVhcTEhyyRceozpTcnnqsQ+gss=  5veDaknFWh0v1vD1hYS6ormlAuz4lcqgp2QtXnU5jFw=       1             2                 0.500000\n",
      "7377408  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  M9rAajz4dYuRhZ7jLvf9RRayVA3os61X/XXHEuW4giA=       1             3                 0.333333\n",
      "7377409  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  ih58jIWgit7qPy3VpvbM6E7AIvs57ylCHMVpIl1T+Io=       0             3                 0.000000\n",
      "7377410  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  n+FLlTL+LrNTa6xpoGkESEzlUMb/hPEM1nLq2U6fEp0=       0             3                 0.000000\n",
      "7377411  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  0b8M2FLEH176y/ng1rK9ftUYDYnA+1NdoCOUeuyBMtU=       1             3                 0.333333\n",
      "7377412  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  e/upMhPFcXl3Rxq0Us1abr0FcThlZOeqKJhBobmltBU=       0             3                 0.000000\n",
      "7377413  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  yk2hZS4SA2yit4orR06WHYDkkXg5+E77bA7Y533znZ4=       0             3                 0.000000\n",
      "7377414  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  CfOyUSvBAIGhsUEcZ6hTnWLLaQrDtzVmoIdmIMXBOTA=       0             3                 0.000000\n",
      "7377415  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  CsqfeIr3/stOaLTq+Z+Yt6VnPSqPWS2wxxnCKJL86oM=       1             3                 0.333333\n",
      "7377416  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  wxghAK3cq1rb0Kkyx6dSxzKGYadE8TQWF9L6dq9B1Is=       0             3                 0.000000\n",
      "\n",
      "[7347972 rows x 5 columns]\n"
     ]
    }
   ],
   "source": [
    "print(df_train.sort_values(by=['total_rating'],ascending=False))\n",
    "df_train['fractional_rating_count']=df_train['target']/df_train['total_rating']\n",
    "print(df_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "用户数量为： 27113\n",
      "歌曲数量为： 359353\n"
     ]
    }
   ],
   "source": [
    "#所有的用户和item\n",
    "users=df_train['msno'].unique()\n",
    "n_users=len(users)\n",
    "print('用户数量为：',n_users)\n",
    "items=df_train['song_id'].unique()\n",
    "n_items=len(items)\n",
    "print('歌曲数量为：',n_items)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "#存为csv格式\n",
    "feat_names=df_train.columns\n",
    "train1 = pd.DataFrame(columns = feat_names, data = df_train)\n",
    "train1.to_csv('train1.csv',index = False,header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                                 msno                                       song_id  target  total_rating  fractional_rating_count\n",
      "0        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=       1          2791                 0.000358\n",
      "1        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  3qm6XTZ6MOCU11x8FIVbAGH5l5uMkT3/ZalWG1oo2Gc=       1          2791                 0.000358\n",
      "2        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  3Hg5kugV1S0wzEVLAEfqjIV5UHzb7bCrdBRQlGygLvU=       1          2791                 0.000358\n",
      "3        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  bPIvRTzfHxH5LgHrStll+tYwSQNVV8PySgA3M1PfTgc=       1          2791                 0.000358\n",
      "4        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  XUz4Z4wPPChz+OIwkwj7HJ8teIjW3rEMdEATM80rDxM=       1          2791                 0.000358\n",
      "5        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  +h+uIXLQyXHuCdwAY5oT3T302vt0IdQJtux0APXhriw=       1          2791                 0.000358\n",
      "6        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  +LztcJcPEEwsikk6+K5udm06XJQMzR4+lzavKLUyE0k=       1          2791                 0.000358\n",
      "7        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  rdNg0FiLBStCei8h1/h2kwv8IS97ZgONQkb/kXGAvqM=       1          2791                 0.000358\n",
      "8        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  WRuHbl8CJbAuzUb4f/88Ly6IXif+IVVWolkp33Nm8wI=       1          2791                 0.000358\n",
      "9        FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  NWrFxxzjzsSolyGnnLWlkRrMUIzbxhKesaJHNOp8e80=       1          2791                 0.000358\n",
      "10       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  eY3mod6Vs39GLjL86RwYG+6jDkjYUB5AGjbPEuqJmQM=       1          2791                 0.000358\n",
      "11       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  nt4dYk1vQyKPzun5YtaRpLB9xWOWIU3QKiZV3aTOJYs=       1          2791                 0.000358\n",
      "12       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  BnjOmObP+8X8Yxk3tPHy1309L++jhPWBFD8QXCYJsT4=       1          2791                 0.000358\n",
      "13       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  EUEMfptvZVeqhSQI6PzZeE/dX34a4fE1AnbJNyuDPjk=       1          2791                 0.000358\n",
      "14       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  yPMxRWxhG2gZj2BV1mzTW/faIoigIh1F9gWtXBslIOg=       1          2791                 0.000358\n",
      "15       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  e8qPmXjLP6RwXiYsrQB2Z3gRSS34LNCoYFR7iYznvOs=       1          2791                 0.000358\n",
      "16       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  YK5U5FaNxjp4ICossGxITj7uxPEBSOrzD/ogfvujcXs=       1          2791                 0.000358\n",
      "17       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  NOZGiW7PvfAlCrlciaZpmYMpXM8OiJMnLjqUfna7vQM=       1          2791                 0.000358\n",
      "18       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  IKMFuL0f5Y8c63Hg9BXkeNJjE0z8yf3gMt/tOxF4QNE=       1          2791                 0.000358\n",
      "19       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  81mK2AN53Wxo7lKT15YL1lcCYdwmuZ/cSnmDiuwG+Uc=       1          2791                 0.000358\n",
      "20       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  p9Vg2zk7w856BnMwxQYfqHp/DR7T+8TLqA6ZdA7s+MA=       1          2791                 0.000358\n",
      "21       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  +lmBkKRxox8E2SdS6gdyU+DaGpf5DfNT94qyGo+udPU=       1          2791                 0.000358\n",
      "22       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  RLpgMQTJp+tVSJrTH+0CiQXFqDYiTA/6XFcudU8r/gg=       1          2791                 0.000358\n",
      "23       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  koF2RGY1ZmpeylEgkSIEl+ee4Cs7F3Vkf6PnV/gHkb8=       1          2791                 0.000358\n",
      "24       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  R335aDVP9ZQtRVczBZsL42tzPTruFj6cdmZwpjX/U9A=       1          2791                 0.000358\n",
      "25       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  q3ciI9hdndWhjjP0t6aEUtGTSfwOYH5WwUi6DTXY+Vs=       1          2791                 0.000358\n",
      "26       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  WSBiRTkhNB9UjO9pP6a+NrD8UnkaKq9VHPFlSHuMVSE=       0          2791                 0.000000\n",
      "27       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  vKnUal7UGXSL5tJiSMCvd9tZzQxUccL5flhLugrLCaA=       1          2791                 0.000358\n",
      "28       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  GSZ0gIf6RxyHDRCSxVU4MAzzOTCikoGY9Cp1pI6xiJo=       1          2791                 0.000358\n",
      "29       FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=  Ru7n8Xw2s8LGDsgDhyzWqCWQRWQW9KNPY9qMOFAf5x0=       1          2791                 0.000358\n",
      "...                                               ...                                           ...     ...           ...                      ...\n",
      "7377385  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  zGPSre/5uRjXC0Tk0Jg6k0y4pZ/jITGi+5KOZ1qTCLE=       0             6                 0.000000\n",
      "7377386  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  pNFxumkeGnkpmu5StyoyqtkNFyyDo8dZG3aNKs5Fn0c=       1             6                 0.166667\n",
      "7377387  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  FX3h9BKmJRKks6I/fxWC59nUDa4KSWyuFTnusRhR6kU=       1             6                 0.166667\n",
      "7377388  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  as7bXjxE++S9bt2J+xLlO3zebSQf/xWpP3ayX36RPsg=       1             6                 0.166667\n",
      "7377389  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  +qUbpr7twjsFKXd4DdV7q+XzkAMOCSSWlLh9T4/qH7I=       0             6                 0.000000\n",
      "7377390  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  Irmfofy3g8p4ZpkBK+o+bcixbsNJ5tqlm/GJYYrDqGM=       0             6                 0.000000\n",
      "7377391  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  nL4V8M8ebB4nSZBL2WUflU4gAWKzc+48Rbh8Y8s80E8=       0             6                 0.000000\n",
      "7377392  WnPcJ98nXHHrjEiFtvT6eqcmq/smh9SEUd+W8aSdLuU=  DWwk7s1MeQc0W8lBVZciEokgrQ/Pfloy4ofEgchCiwc=       0             6                 0.000000\n",
      "7377393  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  nO6o2gW+V7ZamFfzdbbchzESnOE3vFOTZkCvYd7eJd0=       1             8                 0.125000\n",
      "7377394  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  Xpjwi8UAE2Vv9PZ6cZnhc58MCtl3cKZEO1sdAkqJ4mo=       1             8                 0.125000\n",
      "7377395  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  uzU6ntNZT60j0CqFNXTpF6Oo8W3w5UwU/ANEODidW04=       1             8                 0.125000\n",
      "7377396  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  8YUHzpRNxYkj2y99bmbmCI7tQUgESdRzTThSPovZ3Zw=       0             8                 0.000000\n",
      "7377397  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  XkTE3XVEfeBh/AYg2DB4zi/GXLKZl7w1UNqT5X2AK8E=       1             8                 0.125000\n",
      "7377398  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  iEQA6d2yx8JtQLqT5wVEgWgSQuN+p6lUQsGJx4FrC2A=       1             8                 0.125000\n",
      "7377399  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  podlycp3c6tOEDw7q8iQBVgvFjiMpkWWXTlXljGdbTY=       1             8                 0.125000\n",
      "7377400  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  z5ps7i1JPniXcrpFgXsdNTpcnT15e1PNUjhojniJq4c=       1             8                 0.125000\n",
      "7377401  c5ulAneqUy/LGTv/XZzr7pRBoEBQ+QNezsyOMKnmApY=  ayL6eLHKSrmX4smFi1KbGMYrWI9EwsiErGjWUJ9CTrQ=       1             8                 0.125000\n",
      "7377402  PpGtbkm587Wdk+87fog2IS/1LTP9gNRpOdyCLY/Bm4I=  lp7zKhZ+otpqMRcJ5Lk6p2Kc3BStTGgZKtNRPLMEDqw=       0             1                 0.000000\n",
      "7377403  PpGtbkm587Wdk+87fog2IS/1LTP9gNRpOdyCLY/Bm4I=  CsqfeIr3/stOaLTq+Z+Yt6VnPSqPWS2wxxnCKJL86oM=       1             1                 1.000000\n",
      "7377406  4N9yZKANKrn3HQIBhoVhcTEhyyRceozpTcnnqsQ+gss=  0m0HHeb33rsWnLA3N13G2GKPYx3pgBN2JLoutqR8Mi0=       1             2                 0.500000\n",
      "7377407  4N9yZKANKrn3HQIBhoVhcTEhyyRceozpTcnnqsQ+gss=  5veDaknFWh0v1vD1hYS6ormlAuz4lcqgp2QtXnU5jFw=       1             2                 0.500000\n",
      "7377408  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  M9rAajz4dYuRhZ7jLvf9RRayVA3os61X/XXHEuW4giA=       1             3                 0.333333\n",
      "7377409  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  ih58jIWgit7qPy3VpvbM6E7AIvs57ylCHMVpIl1T+Io=       0             3                 0.000000\n",
      "7377410  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  n+FLlTL+LrNTa6xpoGkESEzlUMb/hPEM1nLq2U6fEp0=       0             3                 0.000000\n",
      "7377411  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  0b8M2FLEH176y/ng1rK9ftUYDYnA+1NdoCOUeuyBMtU=       1             3                 0.333333\n",
      "7377412  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  e/upMhPFcXl3Rxq0Us1abr0FcThlZOeqKJhBobmltBU=       0             3                 0.000000\n",
      "7377413  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  yk2hZS4SA2yit4orR06WHYDkkXg5+E77bA7Y533znZ4=       0             3                 0.000000\n",
      "7377414  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  CfOyUSvBAIGhsUEcZ6hTnWLLaQrDtzVmoIdmIMXBOTA=       0             3                 0.000000\n",
      "7377415  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  CsqfeIr3/stOaLTq+Z+Yt6VnPSqPWS2wxxnCKJL86oM=       1             3                 0.333333\n",
      "7377416  pSXUjXoK/eKqB0much1HvcBsOUTxPiCaC8WYUT/9fxg=  wxghAK3cq1rb0Kkyx6dSxzKGYadE8TQWF9L6dq9B1Is=       0             3                 0.000000\n",
      "\n",
      "[7347972 rows x 5 columns]\n"
     ]
    }
   ],
   "source": [
    "print(train1)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
